{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "metadata": {},
   "outputs": [],
   "source": [
    "class LogisticRegression():\n",
    "    \n",
    "    def __init__(self, epochs=100, learningRate=0.01):\n",
    "        self._epochs = epochs\n",
    "        self._lr = learningRate\n",
    "    \n",
    "    def _foreword(self, X, Y):\n",
    "        z = np.dot(X, self._w.T)\n",
    "        a = 1 / (1 + np.exp(-z))\n",
    "        J = - np.mean(Y * np.log(a) + (1 - Y) * np.log(1 - a))\n",
    "        \n",
    "        return J, a\n",
    "    \n",
    "    def _backward(self, X, Y, a):\n",
    "        gradW = np.dot(X.T, (a - Y).T) / len(X)\n",
    "        gradB = np.mean(a - Y)\n",
    "        \n",
    "        return gradW, gradB\n",
    "    \n",
    "    def fit(self, X, Y):\n",
    "        self._w = np.zeros((len(X[0]),))\n",
    "        self._b = 0\n",
    "        self._costs = []\n",
    "        \n",
    "        for i in range(self._epochs):\n",
    "            J, a = self._foreword(X, Y)\n",
    "            gradW, gradB = self._backward(X, Y, a)\n",
    "            \n",
    "            self._w = self._w - self._lr * gradW\n",
    "            self._b = self._b - self._lr * gradB\n",
    "            \n",
    "            if i % 100 == 0 or i == self._epochs - 1:\n",
    "                self._costs.append(J)\n",
    "                print('step:{0} -- costs: {1}'.format(i, J))\n",
    "        \n",
    "    def predict(self, X):\n",
    "        z = np.dot(X, self._w.T)\n",
    "        a = 1 / (1 + np.exp(-z))\n",
    "        \n",
    "        return a > 0.5\n",
    "    \n",
    "    def accuracy(self, Y, predict):\n",
    "        return np.mean(Y == predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 155,
   "metadata": {},
   "outputs": [],
   "source": [
    "def loadDataset(path):\n",
    "    with open(path) as f:\n",
    "        texts = f.readlines()\n",
    "        \n",
    "    X = []\n",
    "    Y = []\n",
    "    \n",
    "    for line in texts:\n",
    "        parts = line.strip().split()\n",
    "        parts = [float(part) for part in parts]\n",
    "        \n",
    "        X.append(parts[:-1])\n",
    "        Y.append(parts[-1])\n",
    "        \n",
    "    X = np.array(X, dtype=float)\n",
    "    Y = np.array(Y, dtype=int)\n",
    "    \n",
    "    return X, Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 156,
   "metadata": {},
   "outputs": [],
   "source": [
    "kTrainPath = 'horseColicTraining.txt'\n",
    "kTestPath = 'horseColicTest.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((299, 21), (299,))"
      ]
     },
     "execution_count": 157,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "XTrain, YTrain = loadDataset(kTrainPath)\n",
    "\n",
    "XTrain.shape, YTrain.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((67, 21), (67,))"
      ]
     },
     "execution_count": 158,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "XTest, YTest = loadDataset(kTestPath)\n",
    "\n",
    "XTest.shape, YTest.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 159,
   "metadata": {},
   "outputs": [],
   "source": [
    "class StandardScaler():\n",
    "    \n",
    "    def fit(self, X):\n",
    "        self._std = np.std(X, axis=0)\n",
    "        self._mean = np.mean(X, axis=0)\n",
    "        \n",
    "    def transform(self, X):\n",
    "        return (X - self._mean) / self._std"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "metadata": {},
   "outputs": [],
   "source": [
    "scaler = StandardScaler()\n",
    "\n",
    "scaler.fit(XTrain)\n",
    "\n",
    "XTrainStd = scaler.transform(XTrain)\n",
    "XTestStd = scaler.transform(XTest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "metadata": {},
   "outputs": [],
   "source": [
    "lrModel = LogisticRegression(1000, 0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "step:0 -- costs: 0.6931471805599453\n",
      "step:100 -- costs: 0.5511923786099129\n",
      "step:200 -- costs: 0.5442163958549229\n",
      "step:300 -- costs: 0.5426760155997999\n",
      "step:400 -- costs: 0.5422517630351605\n",
      "step:500 -- costs: 0.5421234801963281\n",
      "step:600 -- costs: 0.542082522816962\n",
      "step:700 -- costs: 0.5420689653921427\n",
      "step:800 -- costs: 0.5420643634041663\n",
      "step:900 -- costs: 0.5420627732447695\n",
      "step:999 -- costs: 0.5420622199793595\n"
     ]
    }
   ],
   "source": [
    "lrModel.fit(XTrainStd, YTrain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "metadata": {},
   "outputs": [],
   "source": [
    "testPrediction = lrModel.predict(XTestStd)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7313432835820896"
      ]
     },
     "execution_count": 164,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lrModel.accuracy(YTest, testPrediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
